/*
 * Copyright (c) 2015 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 *
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 *
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 *
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */

#if CONFIG_PGTRACE
#include <kern/debug.h>
#include <kern/clock.h>
#include <pexpert/pexpert.h>
#include <arm/pmap.h>
#include "pgtrace_decoder.h"

//-------------------------------------------------------------------
// Macros
//
#define DBG     1
#if DBG == 1
#define INLINE  __attribute__((noinline))
#else
#define INLINE  inline
#endif  

#define BITS(v, msb, lsb)    ((v) << (31-msb) >> (31-msb) >> (lsb))
#define READ_GPR_X(ss, n, v) { \
    if (__builtin_expect(n < 31, 1)) (v) = (ss)->ss_64.x[(n)]; \
    else if (n == 31) (v) = 0; \
    else { panic("Invalid GPR x%d", n); __builtin_unreachable(); } \
}
#define READ_GPR_W(ss, n, v) { \
    if (__builtin_expect(n < 31, 1)) (v) = *(uint32_t*)&((ss)->ss_64.x[(n)]); \
    else if (n == 31) (v) = 0; \
    else { panic("Invalid GPR w%d", n); __builtin_unreachable(); } \
}
#define WRITE_GPR_X(ss, n, v) { \
    if (__builtin_expect(n < 31, 1)) (ss)->ss_64.x[(n)] = (v); \
    else if (n == 31) {} \
    else { panic("Invalid GPR x%d", n); __builtin_unreachable(); } \
}
#define WRITE_GPR_W(ss, n, v) { \
    if (__builtin_expect(n < 31, 1)) *(uint32_t*)&((ss)->ss_64.x[(n)]) = (v); \
    else if (n == 31) {} \
    else { panic("Invalid GPR w%d", n); __builtin_unreachable(); } \
}
#define SIGN_EXTEND_64(val, width)  (((int64_t)(val) << (64 - (width)) >> (64 - (width))))
#define ZERO_EXTEND_64(val, width)  (((uint64_t)(val) << (64 - (width))) >> (64 - (width)))

//-------------------------------------------------------------------
// Types
//
typedef int (*run_t)(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);

typedef struct {
    vm_offset_t addr;
    uint64_t    bytes;
} instruction_info_t;

typedef bool (*get_info_t)(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);

typedef struct {
    uint32_t mask;
    uint32_t value;
    run_t run;
    get_info_t get_info;
} type_entry_t;

//-------------------------------------------------------------------
// Statics
//
static int run_simd(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c335(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c336(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c337(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c338(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c339(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3310(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3311(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3312(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3313(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3314(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3315(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static int run_c3316(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res);
static bool get_info_simd(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c335(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c336(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c337(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c338(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c339(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3310(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3311(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3312(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3313(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3314(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3315(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);
static bool get_info_c3316(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info);

// Table from ARM DDI 0487A.a C3.3
static type_entry_t typetbl[] = {
    { 0x3f000000, 0x08000000, run_c336, get_info_c336 },     // Load/store exclusive
    { 0x3b000000, 0x18000000, run_c335, get_info_c335 },     // Load register (literal)
    { 0x3b800000, 0x28000000, run_c337, get_info_c337 },     // Load/store no-allocate pair (offset)
    { 0x3b800000, 0x28800000, run_c3315, get_info_c3315 },   // Load/store register pair (post-indexed)
    { 0x3b800000, 0x29000000, run_c3314, get_info_c3314 },   // Load/store register pair (offset)
    { 0x3b800000, 0x29800000, run_c3316, get_info_c3316 },   // Load/store register pair (pre-indexed)
    { 0x3b200c00, 0x38000000, run_c3312, get_info_c3312 },   // Load/store register (unscaled immediate)
    { 0x3b200c00, 0x38000400, run_c338, get_info_c338 },     // Load/store register (immediate post-indexed)
    { 0x3b200c00, 0x38000800, run_c3311, get_info_c3311 },   // Load/store register (unprivileged)
    { 0x3b200c00, 0x38000c00, run_c339, get_info_c339 },     // Load/store register (immediate pre-indexed)
    { 0x3b200c00, 0x38200800, run_c3310, get_info_c3310 },   // Load/store register (register offset)
    { 0x3b000000, 0x39000000, run_c3313, get_info_c3313 },   // Load/store register (unsigned immediate)

    { 0xbfbf0000, 0x0c000000, run_simd, get_info_simd },     // AdvSIMD load/store multiple structures
    { 0xbfa00000, 0x0c800000, run_simd, get_info_simd },   // AdvSIMD load/store multiple structures (post-indexed)
    { 0xbf980000, 0x0d000000, run_simd, get_info_simd },   // AdvSIMD load/store single structure
    { 0xbf800000, 0x0d800000, run_simd, get_info_simd }    // AdvSIMD load/store single structure (post-indexed)
};

static pgtrace_stats_t stats;

INLINE static void do_str(uint8_t size, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_STORE;

    if (size == 8) {
        READ_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        READ_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    if (size == 1) __asm__ volatile("strb %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 2) __asm__ volatile("strh %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 4) __asm__ volatile("str %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 8) __asm__ volatile("str %x[xt], [%[va]]\n" :: [xt] "r"(xt), [va] "r"(va));
    else panic("%s Invalid size %d\n", __func__, size);

    stats.stat_decoder.sd_str++;
}

INLINE static void do_ldr(uint8_t size, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_LOAD;

    if (size == 1) __asm__ volatile("ldrb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 2) __asm__ volatile("ldrh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 4) __asm__ volatile("ldr %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 8) __asm__ volatile("ldr %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else panic("%s Invalid size %d\n", __func__, size);

    if (size == 8) {
        WRITE_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        WRITE_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    stats.stat_decoder.sd_ldr++;
}

INLINE static void do_stp(uint8_t size, uint8_t Rt, uint8_t Rt2, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt1, wt2;
    uint64_t xt1, xt2;

    if (size == 4) {
        READ_GPR_W(ss, Rt, wt1);
        READ_GPR_W(ss, Rt2, wt2);
        __asm__ volatile("stp %w[wt1], %w[wt2], [%[va]]\n" :: [wt1] "r"(wt1), [wt2] "r"(wt2), [va] "r"(va)); 
        res->rr_rw = PGTRACE_RW_STORE;
        res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
        res->rr_addrdata[0].ad_data = wt1;
        res->rr_addrdata[1].ad_data = wt2;
    } else if (size == 8) {
        READ_GPR_X(ss, Rt, xt1);
        READ_GPR_X(ss, Rt2, xt2);
        __asm__ volatile("stp %x[xt1], %x[xt2], [%[va]]\n" :: [xt1] "r"(xt1), [xt2] "r"(xt2), [va] "r"(va)); 
        res->rr_rw = PGTRACE_RW_STORE;
        res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
        res->rr_addrdata[0].ad_data = xt1;
        res->rr_addrdata[1].ad_data = xt2;
    } else panic("%s Invalid size %d\n", __func__, size);

    stats.stat_decoder.sd_stp++;
}

INLINE static void do_ldp(uint8_t size, uint8_t Rt, uint8_t Rt2, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt1, wt2;
    uint64_t xt1, xt2;

    if (size == 4) {
        __asm__ volatile("ldp %w[wt1], %w[wt2], [%[va]]\n" : [wt1] "=r"(wt1), [wt2] "=r"(wt2) : [va] "r"(va)); 
        WRITE_GPR_W(ss, Rt, wt1);
        WRITE_GPR_W(ss, Rt2, wt2);
        res->rr_rw = PGTRACE_RW_STORE;
        res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
        res->rr_addrdata[0].ad_data = wt1;
        res->rr_addrdata[1].ad_data = wt2;
    } else if (size == 8) {
        __asm__ volatile("ldp %x[xt1], %x[xt2], [%[va]]\n" : [xt1] "=r"(xt1), [xt2] "=r"(xt2) : [va] "r"(va)); 
        WRITE_GPR_X(ss, Rt, xt1);
        WRITE_GPR_X(ss, Rt2, xt2);
        res->rr_rw = PGTRACE_RW_STORE;
        res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
        res->rr_addrdata[0].ad_data = xt1;
        res->rr_addrdata[1].ad_data = xt2;
    } else panic("%s Invalid size %d\n", __func__, size);

    stats.stat_decoder.sd_ldp++;
}

INLINE static void do_ldpsw(uint8_t Rt, uint8_t Rt2, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint64_t xt1, xt2;

    __asm__ volatile("ldpsw %x[xt1], %x[xt2], [%[va]]\n" : [xt1] "=r"(xt1), [xt2] "=r"(xt2) : [va] "r"(va));
    WRITE_GPR_X(ss, Rt, xt1);
    WRITE_GPR_X(ss, Rt2, xt2);
    res->rr_rw = PGTRACE_RW_LOAD;
    res->rr_addrdata[1].ad_addr = va+sizeof(uint32_t);
    res->rr_addrdata[0].ad_data = xt1;
    res->rr_addrdata[1].ad_data = xt2;

    stats.stat_decoder.sd_ldpsw++;
}

INLINE static void do_ldrs(uint8_t size, uint8_t extsize, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_LOAD;
    
    if (size == 1 && extsize == 4) __asm__ volatile("ldrsb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 1 && extsize == 8) __asm__ volatile("ldrsb %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else if (size == 2 && extsize == 4) __asm__ volatile("ldrsh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 2 && extsize == 8) __asm__ volatile("ldrsh %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else if (size == 4 && extsize == 8) __asm__ volatile("ldrsw %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else panic("%s Invalid size %d extsize=%d\n", __func__, size, extsize);

    if (extsize == 8) {
        WRITE_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        WRITE_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    stats.stat_decoder.sd_ldrs++;
}

INLINE static void do_ldtrs(uint8_t size, uint8_t extsize, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_LOAD;

    if (size == 1 && extsize == 4) __asm__ volatile("ldtrsb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 1 && extsize == 8) __asm__ volatile("ldtrsb %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else if (size == 2 && extsize == 4) __asm__ volatile("ldtrsh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 2 && extsize == 8) __asm__ volatile("ldtrsh %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else if (size == 4 && extsize == 8) __asm__ volatile("ldtrsw %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else panic("%s Invalid size %d extsize=%d\n", __func__, size, extsize);

    if (extsize == 8) {
        WRITE_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        WRITE_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    stats.stat_decoder.sd_ldtrs++;
}

INLINE static void do_ldtr(uint8_t size, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_LOAD;

    if (size == 1) __asm__ volatile("ldtrb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 2) __asm__ volatile("ldtrh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 4) __asm__ volatile("ldtr %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
    else if (size == 8) __asm__ volatile("ldtr %x[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
    else panic("%s Invalid size %d\n", __func__, size);

    if (size == 8) {
        WRITE_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        WRITE_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    stats.stat_decoder.sd_ldtr++;
}

INLINE static void do_sttr(uint8_t size, uint8_t Rt, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt;
    uint64_t xt;

    res->rr_rw = PGTRACE_RW_STORE;

    if (size == 8) {
        READ_GPR_X(ss, Rt, xt);
        res->rr_addrdata[0].ad_data = xt;
    } else {
        READ_GPR_W(ss, Rt, wt);
        res->rr_addrdata[0].ad_data = wt;
    }

    if (size == 1) __asm__ volatile("sttrb %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 2) __asm__ volatile("sttrh %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 4) __asm__ volatile("sttr %w[wt], [%[va]]\n" :: [wt] "r"(wt), [va] "r"(va));
    else if (size == 8) __asm__ volatile("sttr %x[xt], [%[va]]\n" :: [xt] "r"(xt), [va] "r"(va));
    else panic("%s Invalid size %d\n", __func__, size);

    stats.stat_decoder.sd_sttr++;
}

INLINE static void do_prfm(uint8_t Rt, vm_offset_t va, pgtrace_run_result_t *res)
{
    if (Rt == 0) __asm__ volatile("prfm pldl1keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 1) __asm__ volatile("prfm pldl1strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 2) __asm__ volatile("prfm pldl2keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 3) __asm__ volatile("prfm pldl2strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 4) __asm__ volatile("prfm pldl3keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 5) __asm__ volatile("prfm pldl3strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 6) __asm__ volatile("prfm #6, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 7) __asm__ volatile("prfm #7, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 8) __asm__ volatile("prfm #8, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 9) __asm__ volatile("prfm #9, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 10) __asm__ volatile("prfm #10, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 11) __asm__ volatile("prfm #11, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 12) __asm__ volatile("prfm #12, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 13) __asm__ volatile("prfm #13, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 14) __asm__ volatile("prfm #14, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 15) __asm__ volatile("prfm #15, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 16) __asm__ volatile("prfm pstl1keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 17) __asm__ volatile("prfm pstl1strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 18) __asm__ volatile("prfm pstl2keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 19) __asm__ volatile("prfm pstl2strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 20) __asm__ volatile("prfm pstl3keep, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 21) __asm__ volatile("prfm pstl3strm, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 22) __asm__ volatile("prfm #22, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 23) __asm__ volatile("prfm #23, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 24) __asm__ volatile("prfm #24, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 25) __asm__ volatile("prfm #25, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 26) __asm__ volatile("prfm #26, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 27) __asm__ volatile("prfm #27, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 28) __asm__ volatile("prfm #28, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 29) __asm__ volatile("prfm #29, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 30) __asm__ volatile("prfm #30, [%[va]]\n" : : [va] "r"(va));
    else if (Rt == 31) __asm__ volatile("prfm #31, [%[va]]\n" : : [va] "r"(va));
    else panic("%s Invalid Rt %d\n", __func__, Rt);

    res->rr_num = 0;
    res->rr_rw = PGTRACE_RW_PREFETCH;

    stats.stat_decoder.sd_prfm++;
}

#define CANNOTDECODE(msg, inst) do {\
    panic("%s: " msg " inst=%x not supported yet\n", __func__, inst);\
} while (0)

static int run_simd(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
#pragma unused(pa,va,ss,res)
    CANNOTDECODE("simd", inst);
    return 0;
}

static int run_c335(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t opc = BITS(inst, 31, 30),
             v = BITS(inst, 26, 26),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (opc << 1) | v;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_ldr(4, Rt, va, ss, res);
    else if ((fields == 1) ||
             (fields == 3) ||
             (fields == 5)) CANNOTDECODE("simd", inst);
    else if (fields == 2) do_ldr(8, Rt, va, ss, res);
    else if (fields == 4) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 6) do_prfm(Rt, va, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c335++;

    return 0;
}

static int run_c336(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t ws, wt, wt1, wt2;
    uint64_t xt, xt1, xt2;
    uint32_t size = BITS(inst, 31, 30),
             o2 = BITS(inst, 23, 23),
             L = BITS(inst, 22, 22),
             o1 = BITS(inst, 21, 21),
             Rs = BITS(inst, 20, 16),
             o0 = BITS(inst, 15, 15),
             Rt2 = BITS(inst, 14, 10),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 4) | (o2 << 3) | (L << 2) | (o1 << 1) | o0;

    kprintf("%s Load/store exclusive on device memory???n", __func__);

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    switch (fields) {
        case 0:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stxrb %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 1:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlxrb %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 4:
            __asm__ volatile("ldxrb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 5:
            __asm__ volatile("ldaxrb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 9:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlrb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0xd:
            __asm__ volatile("ldarb %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x10:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stxrh %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x11:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlxrh %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x14:
            __asm__ volatile("ldxrh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x15:
            __asm__ volatile("ldaxrh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x19:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlrh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x1d:
            __asm__ volatile("ldarh %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x20:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stxr %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x21:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlxr %w[ws], %w[wt], [%[va]]\n" : [ws] "=r"(ws) : [wt] "r"(wt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x22:
            READ_GPR_W(ss, Rt, wt1);
            READ_GPR_W(ss, Rt2, wt2);
            __asm__ volatile("stxp %w[ws], %w[wt1], %w[wt2], [%[va]]\n" : [ws] "=r"(ws) : [wt1] "r"(wt1), [wt2] "r"(wt2), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 0x23:
            READ_GPR_W(ss, Rt, wt1);
            READ_GPR_W(ss, Rt2, wt2);
            __asm__ volatile("stlxp %w[ws], %w[wt1], %w[wt2], [%[va]]\n" : [ws] "=r"(ws) : [wt1] "r"(wt1), [wt2] "r"(wt2), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 0x24:
            __asm__ volatile("ldxr %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x25:
            __asm__ volatile("ldaxr %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x26:
            __asm__ volatile("ldxp %w[wt1], %w[wt2], [%[va]]\n" : [wt1] "=r"(wt1), [wt2] "=r"(wt2) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt1);
            WRITE_GPR_W(ss, Rt2, wt2);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 0x27:
            __asm__ volatile("ldaxp %w[wt1], %w[wt2], [%[va]]\n" : [wt1] "=r"(wt1), [wt2] "=r"(wt2) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt1);
            WRITE_GPR_W(ss, Rt2, wt2);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 0x29:
            READ_GPR_W(ss, Rt, wt);
            __asm__ volatile("stlr %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x2d:
            __asm__ volatile("ldar %w[wt], [%[va]]\n" : [wt] "=r"(wt) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = wt;
            break;
        case 0x30:
            READ_GPR_X(ss, Rt, xt);
            __asm__ volatile("stxr %w[ws], %[xt], [%[va]]\n" : [ws] "=r"(ws) : [xt] "r"(xt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = xt;
            break;
        case 0x31: 
            READ_GPR_X(ss, Rt, xt);
            __asm__ volatile("stlxr %w[ws], %[xt], [%[va]]\n" : [ws] "=r"(ws) : [xt] "r"(xt), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = xt;
            break;
        case 0x32:
            READ_GPR_X(ss, Rt, xt1);
            READ_GPR_X(ss, Rt2, xt2);
            __asm__ volatile("stxp %w[ws], %[xt1], %[xt2], [%[va]]\n" : [ws] "=r"(ws) : [xt1] "r"(xt1), [xt2] "r"(xt2), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[1].ad_data = xt2;
            break;
        case 0x33:
            READ_GPR_X(ss, Rt, xt1);
            READ_GPR_X(ss, Rt2, xt2);
            __asm__ volatile("stlxp %w[ws], %[xt1], %[xt2], [%[va]]\n" : [ws] "=r"(ws) : [xt1] "r"(xt1), [xt2] "r"(xt2), [va] "r"(va));
            WRITE_GPR_W(ss, Rs, ws);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[1].ad_data = xt2;
            break;
        case 0x34:
            __asm__ volatile("ldxr %[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = xt;
            break;
        case 0x35:
            __asm__ volatile("ldaxr %[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = xt;
            break;
        case 0x36:
            __asm__ volatile("ldxp %[xt1], %[xt2], [%[va]]\n" : [xt1] "=r"(xt1), [xt2] "=r"(xt2) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt1);
            WRITE_GPR_X(ss, Rt2, xt2);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[0].ad_data = xt2;
            break;
        case 0x37:
            __asm__ volatile("ldaxp %[xt1], %[xt2], [%[va]]\n" : [xt1] "=r"(xt1), [xt2] "=r"(xt2) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt1);
            WRITE_GPR_X(ss, Rt2, xt2);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = va;
            res->rr_addrdata[1].ad_addr = va+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[0].ad_data = xt2;
            break;
        case 0x39:
            READ_GPR_X(ss, Rt, xt);
            __asm__ volatile("stlr %[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_addrdata[0].ad_data = xt;
            break;
        case 0x3d:
            __asm__ volatile("ldar %[xt], [%[va]]\n" : [xt] "=r"(xt) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt);
            res->rr_rw = PGTRACE_RW_LOAD;
            res->rr_addrdata[0].ad_data = xt;
            break;
        default:
            CANNOTDECODE("unknown", inst);
    }

    stats.stat_decoder.sd_c336++;

    return 0;
}

static int run_c337(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t wt1, wt2;
    uint64_t xt1, xt2;
    uint32_t opc = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             L = BITS(inst, 22, 22),
             Rt = BITS(inst, 4, 0),
             Rt2 = BITS(inst, 14, 10);
    uint8_t fields = (opc << 2) | (V << 1) | L;

    switch (fields) {
        case 0:
            READ_GPR_W(ss, Rt, wt1);
            READ_GPR_W(ss, Rt2, wt2);
            __asm__ volatile("stnp %w[wt1], %w[wt2], [%[va]]\n" :: [wt1] "r"(wt1), [wt2] "r"(wt2), [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = pa;
            res->rr_addrdata[1].ad_addr = pa+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 1:
            __asm__ volatile("ldnp %w[wt1], %w[wt2], [%[va]]\n" : [wt1] "=r"(wt1), [wt2] "=r"(wt2) : [va] "r"(va));
            WRITE_GPR_W(ss, Rt, wt1);
            WRITE_GPR_W(ss, Rt2, wt2);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = pa;
            res->rr_addrdata[1].ad_addr = pa+sizeof(wt1);
            res->rr_addrdata[0].ad_data = wt1;
            res->rr_addrdata[1].ad_data = wt2;
            break;
        case 2:
        case 3:
        case 6:
        case 7:
        case 10:
        case 11:
            CANNOTDECODE("simd", inst);
        case 8:
            READ_GPR_X(ss, Rt, xt1);
            READ_GPR_X(ss, Rt2, xt2);
            __asm__ volatile("stnp %x[xt1], %x[xt2], [%[va]]\n" :: [xt1] "r"(xt1), [xt2] "r"(xt2), [va] "r"(va));
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = pa;
            res->rr_addrdata[1].ad_addr = pa+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[1].ad_data = xt2;
            break;
        case 9:
            __asm__ volatile("ldnp %x[xt1], %x[xt2], [%[va]]\n" : [xt1] "=r"(xt1), [xt2] "=r"(xt2) : [va] "r"(va));
            WRITE_GPR_X(ss, Rt, xt1);
            WRITE_GPR_X(ss, Rt2, xt2);
            res->rr_rw = PGTRACE_RW_STORE;
            res->rr_num = 2;
            res->rr_addrdata[0].ad_addr = pa;
            res->rr_addrdata[1].ad_addr = pa+sizeof(xt1);
            res->rr_addrdata[0].ad_data = xt1;
            res->rr_addrdata[1].ad_data = xt2;
            break;
        default:
            CANNOTDECODE("simd", inst);
    }

    stats.stat_decoder.sd_c337++;

    return 0;
}

static int run_c338(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_str(1, Rt, va, ss, res);
    else if (fields == 1) do_ldr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldrs(1, 4, Rt, va, ss, res);
    else if ((fields == 4) ||
             (fields == 5) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 12) ||
             (fields == 13) ||
             (fields == 0x14) ||
             (fields == 0x15) ||
             (fields == 0x1c) ||
             (fields == 0x1d)) CANNOTDECODE("simd", inst);
    else if (fields == 8) do_str(2, Rt, va, ss, res);
    else if (fields == 9) do_ldr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldrs(2, 8, Rt, va, ss, res);
    else if (fields == 11) do_ldrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_str(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_str(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldr(8, Rt, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c338++;

    return 0;
}

static int run_c339(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_str(1, Rt, va, ss, res);
    else if (fields == 1) do_ldr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldrs(1, 4, Rt, va, ss, res);
    else if ((fields == 4) ||
             (fields == 5) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 12) ||
             (fields == 13) ||
             (fields == 0x14) ||
             (fields == 0x15) ||
             (fields == 0x1c) ||
             (fields == 0x1d)) CANNOTDECODE("simd", inst);
    else if (fields == 8) do_str(2, Rt, va, ss, res);
    else if (fields == 9) do_ldr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldrs(2, 8, Rt, va, ss, res);
    else if (fields == 11) do_ldrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_str(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_str(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldr(8, Rt, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c339++;

    return 0;
}

static int run_c3310(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_str(1, Rt, va, ss, res);
    else if (fields == 1) do_ldr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldrs(1, 4, Rt, va, ss, res);
    else if ((fields == 4) ||
             (fields == 5) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 12) ||
             (fields == 13) ||
             (fields == 0x14) ||
             (fields == 0x15) ||
             (fields == 0x1c) ||
             (fields == 0x1d)) CANNOTDECODE("simd", inst);
    else if (fields == 8) do_str(2, Rt, va, ss, res);
    else if (fields == 9) do_ldr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldrs(2, 8, Rt, va, ss, res); 
    else if (fields == 11) do_ldrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_str(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_str(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldr(8, Rt, va, ss, res);
    else if (fields == 0x1a) do_prfm(Rt, va, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3310++;

    return 0;
}

static int run_c3311(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_sttr(1,  Rt, va, ss, res);
    else if (fields == 1) do_ldtr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldtrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldtrs(1, 4, Rt, va, ss, res);
    else if (fields == 8) do_sttr(2, Rt, va, ss, res);
    else if (fields == 9) do_ldtr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldtrs(2, 8, Rt, va, ss, res);
    else if (fields == 11) do_ldtrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_sttr(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldtr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldtrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_sttr(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldtr(8, Rt, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3311++;

    return 0;
}

static int run_c3312(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_str(1, Rt, va, ss, res);
    else if (fields == 1) do_ldr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldrs(1, 4, Rt, va, ss, res);
    else if ((fields == 4) ||
             (fields == 5) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 12) ||
             (fields == 13) ||
             (fields == 0x14) ||
             (fields == 0x15) ||
             (fields == 0x1c) ||
             (fields == 0x1d)) CANNOTDECODE("simd", inst);
    else if (fields == 8) do_str(2, Rt, va, ss, res);
    else if (fields == 9) do_ldr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldrs(2, 8, Rt, va, ss, res);
    else if (fields == 11) do_ldrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_str(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_str(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldr(8, Rt, va, ss, res);
    else if (fields == 0x1a) do_prfm(Rt, va, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3312++;

    return 0;
}

static int run_c3313(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t size = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             opc = BITS(inst, 23, 22),
             Rt = BITS(inst, 4, 0);
    uint8_t fields = (size << 3) | (V << 2) | opc;

    res->rr_num = 1;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_str(1, Rt, va, ss, res);
    else if (fields == 1) do_ldr(1, Rt, va, ss, res);
    else if (fields == 2) do_ldrs(1, 8, Rt, va, ss, res);
    else if (fields == 3) do_ldrs(1, 4, Rt, va, ss, res);
    else if ((fields == 4) ||
             (fields == 5) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 12) ||
             (fields == 13) ||
             (fields == 0x14) ||
             (fields == 0x15) ||
             (fields == 0x1c) ||
             (fields == 0x1d)) CANNOTDECODE("simd", inst);
    else if (fields == 8) do_str(2, Rt, va, ss, res);
    else if (fields == 9) do_ldr(2, Rt, va, ss, res);
    else if (fields == 10) do_ldrs(2, 8, Rt, va, ss, res);
    else if (fields == 11) do_ldrs(2, 4, Rt, va, ss, res);
    else if (fields == 0x10) do_str(4, Rt, va, ss, res);
    else if (fields == 0x11) do_ldr(4, Rt, va, ss, res);
    else if (fields == 0x12) do_ldrs(4, 8, Rt, va, ss, res);
    else if (fields == 0x18) do_str(8, Rt, va, ss, res);
    else if (fields == 0x19) do_ldr(8, Rt, va, ss, res);
    else if (fields == 0x1a) do_prfm(Rt, va, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3313++;

    return 0;
}

static int run_c3314(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t opc = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             L = BITS(inst, 22, 22),
             Rt = BITS(inst, 4, 0),
             Rt2 = BITS(inst, 14, 10);
    uint8_t fields = (opc << 2) | (V << 1) | L;

    res->rr_num = 2;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_stp(4, Rt, Rt2, va, ss, res);
    else if (fields == 1) do_ldp(4, Rt, Rt2, va, ss, res);
    else if ((fields == 2) ||
             (fields == 3) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 10) ||
             (fields == 11)) CANNOTDECODE("simd", inst);
    else if (fields == 5) do_ldpsw(Rt, Rt2, va, ss, res);
    else if (fields == 8) do_stp(8, Rt, Rt2, va, ss, res);
    else if (fields == 9) do_ldp(8, Rt, Rt2, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3314++;

    return 0;
}

static int run_c3315(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t opc = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             L = BITS(inst, 22, 22),
             Rt = BITS(inst, 4, 0),
             Rt2 = BITS(inst, 14, 10);
    uint8_t fields = (opc << 2) | (V << 1) | L;

    res->rr_num = 2;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_stp(4, Rt, Rt2, va, ss, res);
    else if (fields == 1) do_ldp(4, Rt, Rt2, va, ss, res);
    else if ((fields == 2) ||
             (fields == 3) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 10) ||
             (fields == 11)) CANNOTDECODE("simd", inst);
    else if (fields == 5) do_ldpsw(Rt, Rt2, va, ss, res);
    else if (fields == 8) do_stp(8, Rt, Rt2, va, ss, res);
    else if (fields == 9) do_ldp(8, Rt, Rt2, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3315++;

    return 0;
}

static int run_c3316(uint32_t inst, vm_offset_t pa, vm_offset_t va, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint32_t opc = BITS(inst, 31, 30),
             V = BITS(inst, 26, 26),
             L = BITS(inst, 22, 22),
             Rt = BITS(inst, 4, 0),
             Rt2 = BITS(inst, 14, 10);
    uint8_t fields = (opc << 2) | (V << 1) | L;

    res->rr_num = 2;
    res->rr_addrdata[0].ad_addr = pa;

    if (fields == 0) do_stp(4, Rt, Rt2, va, ss, res);
    else if (fields == 1) do_ldp(4, Rt, Rt2, va, ss, res);
    else if ((fields == 2) ||
             (fields == 3) ||
             (fields == 6) ||
             (fields == 7) ||
             (fields == 10) ||
             (fields == 11)) CANNOTDECODE("simd", inst);
    else if (fields == 5) do_ldpsw(Rt, Rt2, va, ss, res);
    else if (fields == 8) do_stp(8, Rt, Rt2, va, ss, res);
    else if (fields == 9) do_ldp(8, Rt, Rt2, va, ss, res);
    else CANNOTDECODE("unknown", inst);

    stats.stat_decoder.sd_c3316++;

    return 0;
}

static bool get_info_simd(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
#pragma unused(inst, ss, info)
    CANNOTDECODE("simd", inst);
    return false;
}

// load register (literal)
static bool get_info_c335(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t opc = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t imm19 = BITS(inst, 23, 5);
    uint32_t fields = (opc << 1) | V;
    uint8_t scale;

    if (__builtin_expect(fields > 6, false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    assert(fields <= 6);

    if (V == 1) {
        scale = 2 + opc;
    } else {
        switch (opc) {
        case 0 ... 1:
            scale = 2 + opc;
            break;
        case 2:
            scale = 2;
            break;
        default:
            CANNOTDECODE("invalid", inst);
            return false;
        }
    }

    info->bytes = 1 << scale;
    info->addr = ss->ss_64.pc + (SIGN_EXTEND_64(imm19, 19) << 2);

    return true;
}

// load/store exclusive
static bool get_info_c336(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t o2 = BITS(inst, 23, 23);
    uint32_t L = BITS(inst, 22, 22);
    uint32_t o1 = BITS(inst, 21, 21);
    uint32_t o0 = BITS(inst, 15, 15);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 4) | (o2 << 3) | (L << 2) | (o1 << 1) | o0;

    if (__builtin_expect((2 <= fields && fields <= 3) ||
                         (6 <= fields && fields <= 8) ||
                         (10 <= fields && fields <= 12) ||
                         (14 <= fields && fields <= 15) ||
                         (18 <= fields && fields <= 19) ||
                         (22 <= fields && fields <= 24) ||
                         (26 <= fields && fields <= 28) ||
                         (30 <= fields && fields <= 31) ||
                         (40 == fields) ||
                         (42 <= fields && fields <= 44) ||
                         (46 <= fields && fields <= 47) ||
                         (56 == fields) ||
                         (58 <= fields && fields <= 60) ||
                         (62 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    info->bytes = (1 << size) << o1;
    info->addr = ss->ss_64.x[Rn];

    return true;
}

// load/store no-allocate pair (offset)
bool get_info_c337(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t opc = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t L = BITS(inst, 22, 22);
    uint32_t imm7 = BITS(inst, 21, 15);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (opc << 2) | (V << 1) | L;
    uint8_t scale;

    if (__builtin_expect((4 <= fields && fields <= 5) ||
                         (12 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = opc + 2;
    } else {
        scale = BITS(opc, 1, 1) + 2;
    }

    // double since it's pair
    info->bytes = 2 * (1 << scale);
    info->addr = ss->ss_64.x[Rn] + (SIGN_EXTEND_64(imm7, 7) << scale); 

    return true;
}

// load/store reigster (immediate post-indexed)
static bool get_info_c338(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;
    uint8_t scale;

    if (__builtin_expect((14 <= fields && fields <= 15) ||
                         (19 == fields) ||
                         (22 <= fields && fields <= 23) ||
                         (26 <= fields && fields <= 27) ||
                         (30 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = BITS(opc, 1, 1) << 2 | size;
    } else {
        scale = size;
    }

    info->bytes = 1 << scale;
    // post-indexed
    info->addr = ss->ss_64.x[Rn];

    return true;
}

// load/store register (immediate pre-indexed)
static bool get_info_c339(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t imm9 = BITS(inst, 20, 12);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;
    uint8_t scale;

    if (__builtin_expect((14 <= fields && fields <= 15) ||
                         (19 == fields) ||
                         (22 <= fields && fields <= 23) ||
                         (26 <= fields && fields <= 27) ||
                         (30 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = BITS(opc, 1, 1) << 2 | size;
    } else {
        scale = size;
    }

    info->bytes = 1 << scale;
    info->addr = ss->ss_64.x[Rn] + SIGN_EXTEND_64(imm9, 9);

    return true;
}

// load/store register (register offset)
static bool get_info_c3310(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t Rm = BITS(inst, 20, 16);
    uint32_t option = BITS(inst, 15, 13);
    uint32_t S = BITS(inst, 12, 12);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;
    uint32_t scale;

    if (__builtin_expect((14 <= fields && fields <= 15) ||
                         (19 == fields) ||
                         (22 <= fields && fields <= 23) ||
                         (27 == fields) ||
                         (30 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = BITS(opc, 1, 1) | size;
    } else {
        scale = size;
    }

    info->bytes = 1 << scale;

    uint64_t m = ss->ss_64.x[Rm];
    uint8_t shift = (S == 1 ? scale : 0);

    switch (option) {
    case 0 ... 3:
        info->addr = ss->ss_64.x[Rn] + (ZERO_EXTEND_64(m, 8 << option) << shift);
        break;
    case 4 ... 7:
        info->addr = ss->ss_64.x[Rn] + (SIGN_EXTEND_64(m, 8 << BITS(option, 1, 0)) << shift);
        break;
    default:
        CANNOTDECODE("invalid", inst);
        return false;
    }

    return true;
}

// load/store register (unprivileged)
static bool get_info_c3311(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t imm9 = BITS(inst, 20, 12);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;

    if (__builtin_expect((4 <= fields && fields <= 7) ||
                         (12 <= fields && fields <= 15) ||
                         (19 <= fields && fields <= 23) ||
                         (26 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    info->bytes = 1 << size;
    info->addr = ss->ss_64.x[Rn] + SIGN_EXTEND_64(imm9, 9);

    return true;
}

// load/store register (unscaled immediate)
static bool get_info_c3312(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t imm9 = BITS(inst, 20, 12);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;
    uint32_t scale;

    if (__builtin_expect((14 <= fields && fields <= 15) ||
                         (19 == fields) ||
                         (22 <= fields && fields <= 23) ||
                         (27 == fields) ||
                         (30 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = BITS(opc, 1, 1) << 2 | size;
    } else {
        scale = size;
    }

    info->bytes = 1 < scale;
    info->addr = ss->ss_64.x[Rn] + SIGN_EXTEND_64(imm9, 9);

    return true;
}

// load/store register (unsigned immediate)
bool get_info_c3313(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t size = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t opc = BITS(inst, 23, 22);
    uint32_t imm12 = BITS(inst, 21, 10);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (size << 3) | (V << 2) | opc;
    uint32_t scale;

    if (__builtin_expect((14 <= fields && fields <= 15) ||
                         (19 == fields) ||
                         (22 <= fields && fields <= 23) ||
                         (27 == fields) ||
                         (30 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = BITS(opc, 1, 1) << 2 | size;
    } else {
        scale = size;
    }

    info->bytes = 1 << scale;
    info->addr = ss->ss_64.x[Rn] + (ZERO_EXTEND_64(imm12, 12) << scale);

    return true;
}

// load/store register pair (offset)
static bool get_info_c3314(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t opc = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t L = BITS(inst, 22, 22);
    uint32_t imm7 = BITS(inst, 21, 15);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (opc << 2) | (V << 1) | L;
    uint8_t scale = 2 + (opc >> 1);

    if (__builtin_expect((4 == fields) ||
                         (12 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = 2 + opc;
    } else {
        scale = 2 + BITS(opc, 1, 1);
    }

    info->bytes = 2 * (1 << scale);
    info->addr = ss->ss_64.x[Rn] + (SIGN_EXTEND_64(imm7, 7) << scale);

    return true;
}

// load/store register pair (post-indexed)
static bool get_info_c3315(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t opc = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t L = BITS(inst, 22, 22);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (opc << 2) | (V << 1) | L;
    uint8_t scale = 2 + (opc >> 1);

    if (__builtin_expect((4 == fields) ||
                         (12 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = 2 + opc;
    } else {
        scale = 2 + BITS(opc, 1, 1);
    }

    info->bytes = 2 * (1 << scale);
    // post-indexed
    info->addr = ss->ss_64.x[Rn];

    return true;
}

// load/store register pair (pre-indexed)
static bool get_info_c3316(uint32_t inst, arm_saved_state_t *ss, instruction_info_t *info)
{
    uint32_t opc = BITS(inst, 31, 30);
    uint32_t V = BITS(inst, 26, 26);
    uint32_t L = BITS(inst, 22, 22);
    uint32_t imm7 = BITS(inst, 21, 15);
    uint32_t Rn = BITS(inst, 9, 5);
    uint32_t fields = (opc << 2) | (V << 1) | L;
    uint8_t scale = 2 + (opc >> 1);

    if (__builtin_expect((4 == fields) ||
                         (12 <= fields), false)) {
        CANNOTDECODE("invalid", inst);
        return false;
    }

    if (V == 1) {
        scale = 2 + opc;
    } else {
        scale = 2 + BITS(opc, 1, 1);
    }

    info->bytes = 2 * (1 << scale);
    info->addr = ss->ss_64.x[Rn] + (SIGN_EXTEND_64(imm7, 7) << scale);

    return true;
}


//-------------------------------------------------------------------
// Globals
//
int pgtrace_decode_and_run(uint32_t inst, vm_offset_t fva, vm_map_offset_t *cva_page, arm_saved_state_t *ss, pgtrace_run_result_t *res)
{
    uint8_t len = sizeof(typetbl)/sizeof(type_entry_t);
    run_t run = NULL;
    get_info_t get_info = NULL;
    vm_offset_t pa, cva;
    vm_offset_t cva_front_page = cva_page[0];
    vm_offset_t cva_cur_page = cva_page[1];
    instruction_info_t info;
    
    for (uint8_t i = 0; i < len; i++) {
        if ((typetbl[i].mask & inst) == typetbl[i].value) {
            run = typetbl[i].run;
            get_info = typetbl[i].get_info;
            break;
        }
    }

    assert(run != NULL && get_info != NULL);

    get_info(inst, ss, &info);

    if (info.addr == fva) {
        cva = cva_cur_page + (fva & ARM_PGMASK);
    } else {
        // which means a front page is not a tracing page
        cva = cva_front_page + (fva & ARM_PGMASK);
    }

    pa = mmu_kvtop(cva);
    if (!pa) {
        panic("%s: invalid address cva=%lx fva=%lx info.addr=%lx inst=%x", __func__, cva, fva, info.addr, inst);
    }

    absolutetime_to_nanoseconds(mach_absolute_time(), &res->rr_time);
    run(inst, pa, cva, ss, res);

    return 0;
}

void pgtrace_decoder_get_stats(pgtrace_stats_t *s)
{
    memcpy((void *)&(s->stat_decoder), &(stats.stat_decoder), sizeof(stats.stat_decoder));
}
#endif
